#setwd('/afs/inf.ed.ac.uk/user/s17/s1725186/Documents/PhD-Models/FirstPUModel/RMarkdowns')
library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally)
library(expss)
library(polycor)
library(foreach) ; library(doParallel)
library(knitr)
library(biomaRt)
library(anRichment) ; library(BrainDiseaseCollection)
suppressWarnings(suppressMessages(library(WGCNA)))
Load preprocessed dataset (preprocessing code in 19_10_14_data_preprocessing.Rmd) and clustering (pipeline in 19_10_21_WGCNA.Rmd)
# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame
# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>%
mutate('ID'=as.character(ensembl_gene_id)) %>%
dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
mutate('Neuronal'=1)
# SFARI Genes
SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_01-03-2020_w_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]
# Clusterings
clusterings = read_csv('./../Data/clusters.csv')
# Update DE_info with SFARI and Neuronal information
genes_info = DE_info %>% mutate('ID'=rownames(.)) %>% left_join(SFARI_genes, by='ID') %>%
mutate(`gene-score`=ifelse(is.na(`gene-score`), 'None', `gene-score`)) %>%
left_join(GO_neuronal, by='ID') %>% left_join(clusterings, by='ID') %>%
mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
mutate(gene.score=ifelse(`gene-score`=='None' & Neuronal==1, 'Neuronal', `gene-score`),
significant=padj<0.05 & !is.na(padj))
# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
host='feb2014.archive.ensembl.org') ## Gencode v19
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=genes_info$ID, mart=mart)
genes_info = genes_info %>% left_join(gene_names, by=c('ID'='ensembl_gene_id'))
clustering_selected = 'DynamicHybrid'
genes_info$Module = genes_info[,clustering_selected]
dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'))
dataset$Module = dataset[,clustering_selected]
# Correct SFARI Scores
dataset$gene.score = genes_info$gene.score
SFARI_colour_hue = function(r) {
pal = c('#FF7631','#FFB100','#E8E328','#b3b3b3','#808080','gray','#d9d9d9')[r]
}
rm(DE_info, GO_annotations, clusterings, getinfo, mart, dds)
Using the hetcor function, that calculates Pearson, polyserial or polychoric correlations depending on the type of variables involved.
datTraits = datMeta %>% dplyr::select(Diagnosis, Region, Sex, Age, PMI, RNAExtractionBatch) %>%
dplyr::rename('ExtractionBatch' = RNAExtractionBatch)
# Recalculate MEs with color labels
ME_object = datExpr %>% t %>% moduleEigengenes(colors = genes_info$Module)
MEs = orderMEs(ME_object$eigengenes)
# Calculate correlation between eigengenes and the traits and their p-values
moduleTraitCor = MEs %>% apply(2, function(x) hetcor(x, datTraits)$correlations[1,-1]) %>% t
rownames(moduleTraitCor) = colnames(MEs)
colnames(moduleTraitCor) = colnames(datTraits)
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nrow(datExpr))
# Create text matrix for the Heatmap
textMatrix = paste0(signif(moduleTraitCor, 2), ' (', signif(moduleTraitPvalue, 1), ')')
dim(textMatrix) = dim(moduleTraitCor)
# In case there are any NAs
if(sum(!complete.cases(moduleTraitCor))>0){
print(paste0(sum(is.na(moduleTraitCor)),' correlation(s) could not be calculated'))
}
rm(ME_object)
I’m going to select all the modules that have an absolute correlation higher than 0.9 with Diagnosis to study them
# Sort moduleTraitCor by Diagnosis
moduleTraitCor = moduleTraitCor[order(moduleTraitCor[,1], decreasing=TRUE),]
moduleTraitPvalue = moduleTraitPvalue[order(moduleTraitCor[,1], decreasing=TRUE),]
# Create text matrix for the Heatmap
textMatrix = paste0(signif(moduleTraitCor, 2), ' (', signif(moduleTraitPvalue, 1), ')')
dim(textMatrix) = dim(moduleTraitCor)
labeledHeatmap(Matrix = moduleTraitCor, xLabels = names(datTraits), yLabels = gsub('ME','',rownames(moduleTraitCor)),
yColorWidth=0, colors = brewer.pal(11,'PiYG'), bg.lab.y = gsub('ME','',rownames(moduleTraitCor)),
textMatrix = textMatrix, setStdMargins = FALSE, cex.text = 0.8, cex.lab.y = 0.75, zlim = c(-1,1),
main = paste('Module-Trait relationships'))
diagnosis_cor = data.frame('Module' = gsub('ME','',rownames(moduleTraitCor)),
'MTcor' = moduleTraitCor[,'Diagnosis'],
'MTpval' = moduleTraitPvalue[,'Diagnosis'])
genes_info = genes_info %>% left_join(diagnosis_cor, by='Module')
rm(moduleTraitPvalue, datTraits, textMatrix, diagnosis_cor)
The modules consist mainly of points with very high (absolute) values in PC2 (which we know is related to lfc), so this result is consistent with the high correlation between Module and Diagnosis, although some of the points with the highest PC2 values do not belong to these top modules
top_modules = gsub('ME','',rownames(moduleTraitCor)[abs(moduleTraitCor[,'Diagnosis'])>0.9])
cat(paste0('Top modules selected: ', paste(top_modules, collapse=', '),'\n'))
## Top modules selected: #73B000, #EE8045, #00B9E3, #84AD00
pca = datExpr %>% prcomp
plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
left_join(dataset, by='ID') %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, PC1, PC2, Module, gene.score) %>%
mutate(ImportantModules = ifelse(Module %in% top_modules, as.character(Module), 'Others')) %>%
mutate(color = ifelse(ImportantModules=='Others','gray',ImportantModules),
alpha = ifelse(ImportantModules=='Others', 0.2, 0.4),
gene_id = paste0(ID, ' (', external_gene_id, ')'))
table(plot_data$ImportantModules)
##
## #00B9E3 #73B000 #84AD00 #EE8045 Others
## 1733 1461 891 884 11178
ggplotly(plot_data %>% ggplot(aes(PC1, PC2, color=ImportantModules)) +
geom_point(alpha=plot_data$alpha, color=plot_data$color, aes(ID=gene_id)) + theme_minimal() +
ggtitle('Modules with strongest relation to Diagnosis'))
rm(pca)
create_plot = function(module){
plot_data = dataset %>% dplyr::select(ID, paste0('MM.',gsub('#','',module)), GS, gene.score) %>% filter(dataset$Module==module)
colnames(plot_data)[2] = 'Module'
SFARI_colors = as.numeric(names(table(as.character(plot_data$gene.score)[plot_data$gene.score!='None'])))
SFARI_colors = SFARI_colors[!is.na(SFARI_colors)]
p = ggplotly(plot_data %>% ggplot(aes(Module, GS, color=gene.score)) + geom_point(alpha=0.5, aes(ID=ID)) + ylab('Gene Significance') +
scale_color_manual(values=SFARI_colour_hue(r=c(SFARI_colors,5,6))) + theme_minimal() + xlab('Module Membership') +
ggtitle(paste0('Module ', module,' (MTcor = ', round(moduleTraitCor[paste0('ME',module),1],2),')')))
return(p)
}
create_plot(top_modules[1])
create_plot(top_modules[2])
create_plot(top_modules[3])
create_plot(top_modules[4])
rm(create_plot)
List of top SFARI Genes in top modules ordered by SFARI score and Gene Significance
table_data = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, GS, gene.score, Module) %>% arrange(gene.score, desc(abs(GS))) %>%
dplyr::rename('Ensembl ID'=ID, 'Gene Symbol'=external_gene_id,
'SFARI score'=gene.score, 'Gene Significance'=GS)
kable(table_data %>% filter(Module == top_modules[1] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
caption=paste0('Top SFARI Genes for Module ', top_modules[1]))
| Ensembl ID | Gene Symbol | Gene Significance | SFARI score |
|---|---|---|---|
| ENSG00000158321 | AUTS2 | 0.5297062 | 1 |
| ENSG00000132510 | KDM6B | 0.3312280 | 1 |
| ENSG00000165186 | PTCHD1 | 0.3027356 | 1 |
| ENSG00000151067 | CACNA1C | 0.2124455 | 1 |
| ENSG00000108001 | EBF3 | 0.1835242 | 1 |
| ENSG00000168036 | CTNNB1 | 0.1598633 | 1 |
| ENSG00000189056 | RELN | -0.1016272 | 1 |
| ENSG00000083168 | KAT6A | 0.5999494 | 2 |
| ENSG00000205581 | HMGN1 | 0.5655605 | 2 |
| ENSG00000259207 | ITGB3 | 0.4676222 | 2 |
| ENSG00000169432 | SCN9A | 0.4140399 | 2 |
| ENSG00000149571 | KIRREL3 | 0.3970895 | 2 |
| ENSG00000113742 | CPEB4 | 0.3724287 | 2 |
| ENSG00000196839 | ADA | 0.3686762 | 2 |
| ENSG00000146938 | NLGN4X | 0.2547162 | 2 |
| ENSG00000118432 | CNR1 | 0.2248206 | 2 |
| ENSG00000166148 | AVPR1A | 0.1138557 | 2 |
| ENSG00000044090 | CUL7 | 0.0391425 | 2 |
| ENSG00000151623 | NR3C2 | -0.0150334 | 2 |
| ENSG00000165097 | KDM1B | 0.8228002 | 3 |
| ENSG00000106366 | SERPINE1 | 0.7911667 | 3 |
| ENSG00000011422 | PLAUR | 0.7765401 | 3 |
| ENSG00000152208 | GRID2 | 0.6205906 | 3 |
| ENSG00000073756 | PTGS2 | 0.5256987 | 3 |
| ENSG00000137713 | PPP2R1B | 0.5152266 | 3 |
| ENSG00000181744 | C3orf58 | 0.5078650 | 3 |
| ENSG00000234745 | HLA-B | 0.4927415 | 3 |
| ENSG00000157554 | ERG | 0.4900669 | 3 |
| ENSG00000068024 | HDAC4 | 0.4764332 | 3 |
| ENSG00000182372 | CLN8 | 0.3979418 | 3 |
| ENSG00000070371 | CLTCL1 | 0.3914968 | 3 |
| ENSG00000170836 | PPM1D | 0.3912223 | 3 |
| ENSG00000071246 | VASH1 | 0.3805608 | 3 |
| ENSG00000196277 | GRM7 | 0.3762334 | 3 |
| ENSG00000251664 | PCDHA12 | 0.3677659 | 3 |
| ENSG00000070366 | SMG6 | 0.3602051 | 3 |
| ENSG00000156564 | LRFN2 | 0.3558337 | 3 |
| ENSG00000215045 | GRID2IP | 0.2860911 | 3 |
| ENSG00000158089 | GALNT14 | 0.2811434 | 3 |
| ENSG00000184903 | IMMP2L | 0.1975311 | 3 |
| ENSG00000128833 | MYO5C | 0.1820315 | 3 |
| ENSG00000173338 | KCNK7 | 0.1804904 | 3 |
| ENSG00000157890 | MEGF11 | 0.1801030 | 3 |
| ENSG00000156011 | PSD3 | 0.1754519 | 3 |
| ENSG00000184845 | DRD1 | 0.1669913 | 3 |
| ENSG00000130508 | PXDN | 0.1229290 | 3 |
| ENSG00000187957 | DNER | 0.0572460 | 3 |
| ENSG00000188277 | C15orf62 | 0.0452293 | 3 |
| ENSG00000187546 | AGMO | -0.0249874 | 3 |
| ENSG00000127329 | PTPRB | -0.0195590 | 3 |
kable(table_data %>% filter(Module == top_modules[2] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
caption=paste0('Top SFARI Genes for Module ', top_modules[2]))
| Ensembl ID | Gene Symbol | Gene Significance | SFARI score |
|---|---|---|---|
| ENSG00000181090 | EHMT1 | 0.7732158 | 1 |
| ENSG00000110066 | SUV420H1 | 0.6536202 | 1 |
| ENSG00000038382 | TRIO | 0.6260413 | 1 |
| ENSG00000148143 | ZNF462 | 0.6188215 | 1 |
| ENSG00000165671 | NSD1 | 0.5413122 | 1 |
| ENSG00000141431 | ASXL3 | 0.5017802 | 1 |
| ENSG00000145348 | TBCK | 0.2948037 | 1 |
| ENSG00000153234 | NR4A2 | 0.2464660 | 1 |
| ENSG00000102974 | CTCF | 0.1270874 | 1 |
| ENSG00000168769 | TET2 | 0.8170356 | 2 |
| ENSG00000162946 | DISC1 | 0.6648966 | 2 |
| ENSG00000141027 | NCOR1 | 0.4643907 | 2 |
| ENSG00000197724 | PHF2 | 0.3656794 | 2 |
| ENSG00000145020 | AMT | 0.3410208 | 2 |
| ENSG00000204764 | RANBP17 | 0.2998930 | 2 |
| ENSG00000101004 | NINL | 0.2768068 | 2 |
| ENSG00000130940 | CASZ1 | 0.2636101 | 2 |
| ENSG00000198218 | QRICH1 | 0.2571314 | 2 |
| ENSG00000128573 | FOXP2 | 0.2493147 | 2 |
| ENSG00000070047 | PHRF1 | 0.2458754 | 2 |
| ENSG00000112902 | SEMA5A | 0.2184377 | 2 |
| ENSG00000165995 | CACNB2 | 0.1700655 | 2 |
| ENSG00000172780 | RAB43 | 0.1656377 | 2 |
| ENSG00000171988 | JMJD1C | 0.7508001 | 3 |
| ENSG00000172554 | SNTG2 | 0.7352646 | 3 |
| ENSG00000184226 | PCDH9 | 0.7208942 | 3 |
| ENSG00000181481 | RNF135 | 0.6744660 | 3 |
| ENSG00000227184 | EPPK1 | 0.5949514 | 3 |
| ENSG00000188785 | ZNF548 | 0.5799337 | 3 |
| ENSG00000131042 | LILRB2 | 0.4087052 | 3 |
| ENSG00000131374 | TBC1D5 | 0.4036662 | 3 |
| ENSG00000179094 | PER1 | 0.3936445 | 3 |
| ENSG00000133216 | EPHB2 | 0.3498022 | 3 |
| ENSG00000239389 | PCDHA13 | 0.3411571 | 3 |
| ENSG00000196367 | TRRAP | 0.3398825 | 3 |
| ENSG00000115556 | PLCD4 | 0.3366430 | 3 |
| ENSG00000007314 | SCN4A | 0.3324757 | 3 |
| ENSG00000150275 | PCDH15 | 0.3322272 | 3 |
| ENSG00000109743 | BST1 | 0.3278767 | 3 |
| ENSG00000182472 | CAPN12 | 0.3164282 | 3 |
| ENSG00000224389 | C4B | 0.3072172 | 3 |
| ENSG00000133665 | DYDC2 | 0.2718275 | 3 |
| ENSG00000169252 | ADRB2 | 0.2100597 | 3 |
| ENSG00000174332 | GLIS1 | 0.2068267 | 3 |
| ENSG00000178665 | ZNF713 | 0.2014021 | 3 |
| ENSG00000135999 | EPC2 | 0.1758127 | 3 |
| ENSG00000154957 | ZNF18 | 0.1212498 | 3 |
| ENSG00000196391 | ZNF774 | 0.1120060 | 3 |
| ENSG00000189283 | FHIT | 0.0671324 | 3 |
| ENSG00000166736 | HTR3A | 0.0604890 | 3 |
kable(table_data %>% filter(Module == top_modules[3] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
caption=paste0('Top SFARI Genes for Module ', top_modules[3]))
| Ensembl ID | Gene Symbol | Gene Significance | SFARI score |
|---|---|---|---|
| ENSG00000144285 | SCN1A | -0.8000233 | 1 |
| ENSG00000196876 | SCN8A | -0.7887782 | 1 |
| ENSG00000158445 | KCNB1 | -0.7188371 | 1 |
| ENSG00000061676 | NCKAP1 | -0.6906433 | 1 |
| ENSG00000021645 | NRXN3 | -0.6715730 | 1 |
| ENSG00000136535 | TBR1 | -0.6536200 | 1 |
| ENSG00000136531 | SCN2A | -0.5374597 | 1 |
| ENSG00000145362 | ANK2 | -0.4367876 | 1 |
| ENSG00000172915 | NBEA | -0.4147304 | 1 |
| ENSG00000050030 | KIAA2022 | -0.4001380 | 1 |
| ENSG00000119866 | BCL11A | -0.3844250 | 1 |
| ENSG00000008086 | CDKL5 | -0.3642207 | 1 |
| ENSG00000174775 | HRAS | -0.3233708 | 1 |
| ENSG00000114861 | FOXP1 | 0.2551455 | 1 |
| ENSG00000198963 | RORB | -0.1988853 | 1 |
| ENSG00000036257 | CUL3 | -0.1935419 | 1 |
| ENSG00000134138 | MEIS2 | -0.0195493 | 1 |
| ENSG00000010818 | HIVEP2 | 0.0164513 | 1 |
| ENSG00000074590 | NUAK1 | -0.8235832 | 2 |
| ENSG00000170579 | DLGAP1 | -0.7789832 | 2 |
| ENSG00000132294 | EFR3A | -0.7500147 | 2 |
| ENSG00000078328 | RBFOX1 | -0.7311006 | 2 |
| ENSG00000003147 | ICA1 | -0.7126337 | 2 |
| ENSG00000197535 | MYO5A | -0.7116545 | 2 |
| ENSG00000175497 | DPP10 | -0.7059507 | 2 |
| ENSG00000174469 | CNTNAP2 | -0.7002690 | 2 |
| ENSG00000182621 | PLCB1 | -0.6917670 | 2 |
| ENSG00000166501 | PRKCB | -0.6151170 | 2 |
| ENSG00000139726 | DENR | -0.5936537 | 2 |
| ENSG00000142230 | SAE1 | -0.5499380 | 2 |
| ENSG00000183454 | GRIN2A | -0.5288764 | 2 |
| ENSG00000185345 | PARK2 | -0.4980373 | 2 |
| ENSG00000164506 | STXBP5 | -0.4854943 | 2 |
| ENSG00000182771 | GRID1 | -0.4477343 | 2 |
| ENSG00000132024 | CC2D1A | -0.4158439 | 2 |
| ENSG00000157445 | CACNA2D3 | -0.3393135 | 2 |
| ENSG00000144619 | CNTN4 | -0.3109404 | 2 |
| ENSG00000185008 | ROBO2 | -0.3078622 | 2 |
| ENSG00000182256 | GABRG3 | -0.2878943 | 2 |
| ENSG00000139174 | PRICKLE1 | -0.2154883 | 2 |
| ENSG00000134115 | CNTN6 | -0.1945703 | 2 |
| ENSG00000166206 | GABRB3 | -0.1793562 | 2 |
| ENSG00000138411 | HECW2 | -0.1360240 | 2 |
| ENSG00000120251 | GRIA2 | -0.1278103 | 2 |
| ENSG00000149970 | CNKSR2 | 0.0921242 | 2 |
| ENSG00000140945 | CDH13 | 0.0846523 | 2 |
| ENSG00000166147 | FBN1 | -0.0830677 | 2 |
| ENSG00000171723 | GPHN | -0.0744071 | 2 |
| ENSG00000149972 | CNTN5 | -0.0486109 | 2 |
| ENSG00000144406 | UNC80 | -0.8619629 | 3 |
| ENSG00000132639 | SNAP25 | -0.8102863 | 3 |
| ENSG00000163618 | CADPS | -0.8086036 | 3 |
| ENSG00000035687 | ADSS | -0.7983791 | 3 |
| ENSG00000159082 | SYNJ1 | -0.7857393 | 3 |
| ENSG00000104093 | DMXL2 | -0.7626271 | 3 |
| ENSG00000170113 | NIPA1 | -0.7171001 | 3 |
| ENSG00000172260 | NEGR1 | -0.7076941 | 3 |
| ENSG00000163399 | ATP1A1 | -0.6892969 | 3 |
| ENSG00000115840 | SLC25A12 | -0.6828484 | 3 |
| ENSG00000196090 | PTPRT | -0.6741994 | 3 |
| ENSG00000130477 | UNC13A | -0.6664444 | 3 |
| ENSG00000154263 | ABCA10 | -0.6424383 | 3 |
| ENSG00000153575 | TUBGCP5 | -0.6291099 | 3 |
| ENSG00000136928 | GABBR2 | -0.6249389 | 3 |
| ENSG00000144290 | SLC4A10 | -0.5758631 | 3 |
| ENSG00000186094 | AGBL4 | -0.5383835 | 3 |
| ENSG00000144331 | ZNF385B | -0.5378671 | 3 |
| ENSG00000196433 | ASMT | -0.5322497 | 3 |
| ENSG00000144893 | MED12L | -0.5311931 | 3 |
| ENSG00000155961 | RAB39B | -0.5253969 | 3 |
| ENSG00000116141 | MARK1 | -0.5059225 | 3 |
| ENSG00000198010 | DLGAP2 | -0.4955545 | 3 |
| ENSG00000155886 | SLC24A2 | -0.4942000 | 3 |
| ENSG00000109158 | GABRA4 | -0.4937671 | 3 |
| ENSG00000243232 | PCDHAC2 | -0.4783915 | 3 |
| ENSG00000175161 | CADM2 | -0.4554469 | 3 |
| ENSG00000157152 | SYN2 | -0.4517946 | 3 |
| ENSG00000162631 | NTNG1 | -0.4511032 | 3 |
| ENSG00000182901 | RGS7 | -0.4464033 | 3 |
| ENSG00000133019 | CHRM3 | -0.4269528 | 3 |
| ENSG00000156113 | KCNMA1 | -0.4056474 | 3 |
| ENSG00000081189 | MEF2C | -0.3994354 | 3 |
| ENSG00000114374 | USP9Y | -0.3530437 | 3 |
| ENSG00000135312 | HTR1B | -0.3456140 | 3 |
| ENSG00000174442 | ZWILCH | -0.3290704 | 3 |
| ENSG00000136653 | RASSF5 | -0.3278436 | 3 |
| ENSG00000152495 | CAMK4 | -0.3173632 | 3 |
| ENSG00000060140 | STYK1 | -0.3142629 | 3 |
| ENSG00000165355 | FBXO33 | -0.3044234 | 3 |
| ENSG00000183117 | CSMD1 | -0.2927903 | 3 |
| ENSG00000170289 | CNGB3 | -0.2731971 | 3 |
| ENSG00000119125 | GDA | -0.2696689 | 3 |
| ENSG00000139915 | MDGA2 | -0.2435673 | 3 |
| ENSG00000042781 | USH2A | 0.2419483 | 3 |
| ENSG00000152402 | GUCY1A2 | -0.2325757 | 3 |
| ENSG00000159640 | ACE | -0.1962668 | 3 |
| ENSG00000185344 | ATP6V0A2 | -0.1924963 | 3 |
| ENSG00000150394 | CDH8 | -0.1866258 | 3 |
| ENSG00000169083 | AR | 0.1691410 | 3 |
| ENSG00000004468 | CD38 | -0.1689250 | 3 |
| ENSG00000215018 | COL28A1 | -0.1475973 | 3 |
| ENSG00000128513 | POT1 | -0.1415512 | 3 |
| ENSG00000244588 | RAD21L1 | -0.1256962 | 3 |
| ENSG00000165379 | LRFN5 | -0.1220706 | 3 |
| ENSG00000100038 | TOP3B | -0.1030750 | 3 |
| ENSG00000163873 | GRIK3 | 0.0933052 | 3 |
| ENSG00000128512 | DOCK4 | -0.0898793 | 3 |
| ENSG00000134909 | ARHGAP32 | 0.0825189 | 3 |
| ENSG00000204632 | HLA-G | -0.0733823 | 3 |
| ENSG00000187775 | DNAH17 | 0.0712448 | 3 |
| ENSG00000165246 | NLGN4Y | -0.0681445 | 3 |
| ENSG00000181036 | FCRL6 | 0.0606235 | 3 |
kable(table_data %>% filter(Module == top_modules[4] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
caption=paste0('Top SFARI Genes for Module ', top_modules[4]))
| Ensembl ID | Gene Symbol | Gene Significance | SFARI score |
|---|---|---|---|
| ENSG00000139613 | SMARCC2 | -0.4329730 | 1 |
| ENSG00000171759 | PAH | -0.6896875 | 2 |
| ENSG00000169918 | OTUD7A | -0.4706156 | 2 |
| ENSG00000106290 | TAF6 | -0.4144602 | 2 |
| ENSG00000196557 | CACNA1H | -0.4127420 | 2 |
| ENSG00000133026 | MYH10 | -0.3262873 | 2 |
| ENSG00000101489 | CELF4 | -0.3169738 | 2 |
| ENSG00000180914 | OXTR | -0.1668994 | 2 |
| ENSG00000135631 | RAB11FIP5 | -0.8340525 | 3 |
| ENSG00000128594 | LRRC4 | -0.4715288 | 3 |
| ENSG00000156110 | ADK | -0.4684323 | 3 |
| ENSG00000130731 | C16orf13 | -0.4412635 | 3 |
| ENSG00000117594 | HSD11B1 | -0.3659904 | 3 |
| ENSG00000160862 | AZGP1 | -0.3630511 | 3 |
| ENSG00000064687 | ABCA7 | -0.3146496 | 3 |
| ENSG00000135127 | CCDC64 | -0.3057298 | 3 |
| ENSG00000185115 | NDNL2 | -0.2931715 | 3 |
| ENSG00000115421 | PAPOLG | -0.2389588 | 3 |
| ENSG00000111615 | KRR1 | -0.2227080 | 3 |
| ENSG00000100359 | SGSM3 | -0.2078293 | 3 |
| ENSG00000101883 | RHOXF1 | -0.1923108 | 3 |
| ENSG00000131653 | TRAF7 | -0.1326903 | 3 |
| ENSG00000138650 | PCDH10 | -0.1102729 | 3 |
| ENSG00000213923 | CSNK1E | 0.0323072 | 3 |
Modules with the strongest module-diagnosis correlation should have the highest percentage of SFARI Genes, but this doesn’t seem to be the case (even the opposite may be true)
plot_data = dataset %>% mutate('hasSFARIscore' = gene.score!='None') %>%
group_by(Module, MTcor, hasSFARIscore) %>% summarise(p=n()) %>%
left_join(dataset %>% group_by(Module) %>% summarise(n=n()), by='Module') %>%
mutate(p=round(p/n*100,2))
for(i in 1:nrow(plot_data)){
this_row = plot_data[i,]
if(this_row$hasSFARIscore==FALSE & this_row$p==100){
new_row = this_row
new_row$hasSFARIscore = TRUE
new_row$p = 0
plot_data = plot_data %>% rbind(new_row)
}
}
plot_data = plot_data %>% filter(hasSFARIscore==TRUE)
ggplotly(plot_data %>% ggplot(aes(MTcor, p, size=n)) + geom_smooth(color='gray', se=FALSE) +
geom_point(color=plot_data$Module, alpha=0.5, aes(id=Module)) + geom_hline(yintercept=mean(plot_data$p), color='gray') +
xlab('Module-Diagnosis correlation') + ylab('% of SFARI genes') +
theme_minimal() + theme(legend.position = 'none'))
rm(i, this_row, new_row, plot_data)
Breaking the SFARI genes by score
scores = c(1,2,3,'None')
plot_data = dataset %>% group_by(Module, MTcor, gene.score) %>% summarise(n=n()) %>%
left_join(dataset %>% group_by(Module) %>% summarise(N=n()), by='Module') %>%
mutate(p=round(n/N*100,2), gene.score = as.character(gene.score))
for(i in 1:nrow(plot_data)){
this_row = plot_data[i,]
if(sum(plot_data$Module == this_row$Module)<7){
missing_scores = which(! scores %in% plot_data$gene.score[plot_data$Module == this_row$Module])
for(s in missing_scores){
new_row = this_row
new_row$gene.score = as.character(s)
new_row$n = 0
new_row$p = 0
plot_data = plot_data %>% rbind(new_row)
}
}
}
plot_data = plot_data %>% filter(gene.score != 'None')
plot_function = function(i){
i = 2*i-1
plot_list = list()
for(j in 1:2){
plot_list[[j]] = ggplotly(plot_data %>% filter(gene.score==scores[i+j-1]) %>% ggplot(aes(MTcor, p, size=n)) +
geom_smooth(color='gray', se=FALSE) +
geom_point(color=plot_data$Module[plot_data$gene.score==scores[i+j-1]], alpha=0.5, aes(id=Module)) +
geom_hline(yintercept=mean(plot_data$p[plot_data$gene.score==scores[i+j-1]]), color='gray') +
xlab('Module-Diagnosis correlation') + ylab('% of SFARI genes') +
theme_minimal() + theme(legend.position = 'none'))
}
p = subplot(plot_list, nrows=1) %>% layout(annotations = list(
list(x = 0.2 , y = 1.05, text = paste0('SFARI score ', scores[i]), showarrow = F, xref='paper', yref='paper'),
list(x = 0.8 , y = 1.05, text = paste0('SFARI score ', scores[i+1]), showarrow = F, xref='paper', yref='paper')))
return(p)
}
plot_function(1)
plot_function(2)
rm(i, s, this_row, new_row, plot_function)
Since these modules have the strongest relation to autism, this pattern should be reflected in their model eigengenes, having two different behaviours for the samples corresponding to autism and the ones corresponding to control.
In both cases, the Eigengenes separate the behaviour between autism and control samples very clearly!
plot_EGs = function(module){
plot_data = data.frame('ID' = rownames(MEs), 'MEs' = MEs[,paste0('ME',module)], 'Diagnosis' = datMeta$Diagnosis)
p = plot_data %>% ggplot(aes(Diagnosis, MEs, fill=Diagnosis)) + geom_boxplot() + theme_minimal() + theme(legend.position='none') +
ggtitle(paste0('Module ', module, ' (MTcor=',round(moduleTraitCor[paste0('ME',module),1],2),')'))
return(p)
}
p1 = plot_EGs(top_modules[1])
p2 = plot_EGs(top_modules[2])
p3 = plot_EGs(top_modules[3])
p4 = plot_EGs(top_modules[4])
grid.arrange(p1, p2, p3, p4, nrow=2)
rm(plot_EGs, p1, p2, p3, p4)
Selecting the modules with the highest correlation to Diagnosis, and, from them, the genes with the highest module membership-(absolute) gene significance
*Ordered by \(\frac{MM+|GS|}{2}\)
There aren’t that many SFARI genes in the top genes of the modules and not a single one belonging to scores 1 and 2
create_table = function(module){
top_genes = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, paste0('MM.',gsub('#','',module)), GS, gene.score) %>%
filter(dataset$Module==module) %>% dplyr::rename('MM' = paste0('MM.',gsub('#','',module))) %>%
mutate(importance = (MM+abs(GS))/2) %>% arrange(by=-importance) %>% top_n(20)
return(top_genes)
}
top_genes = list()
for(i in 1:length(top_modules)) top_genes[[i]] = create_table(top_modules[i])
kable(top_genes[[1]], caption=paste0('Top 10 genes for module ', top_modules[1], ' (MTcor = ',
round(moduleTraitCor[paste0('ME',top_modules[1]),1],2),')'))
| ID | external_gene_id | MM | GS | gene.score | importance |
|---|---|---|---|---|---|
| ENSG00000143384 | MCL1 | 0.8500241 | 0.9078060 | None | 0.8789151 |
| ENSG00000158615 | PPP1R15B | 0.8407717 | 0.8212537 | None | 0.8310127 |
| ENSG00000196935 | SRGAP1 | 0.7175838 | 0.9224538 | None | 0.8200188 |
| ENSG00000161638 | ITGA5 | 0.7550781 | 0.8597979 | None | 0.8074380 |
| ENSG00000148841 | ITPRIP | 0.7913310 | 0.8161802 | None | 0.8037556 |
| ENSG00000120278 | PLEKHG1 | 0.7857882 | 0.8086459 | None | 0.7972171 |
| ENSG00000133639 | BTG1 | 0.7819379 | 0.8082183 | None | 0.7950781 |
| ENSG00000101493 | ZNF516 | 0.6925114 | 0.8811710 | None | 0.7868412 |
| ENSG00000150457 | LATS2 | 0.7097060 | 0.8499030 | None | 0.7798045 |
| ENSG00000097007 | ABL1 | 0.7612757 | 0.7941289 | None | 0.7777023 |
| ENSG00000253731 | PCDHGA6 | 0.7450788 | 0.8036535 | None | 0.7743662 |
| ENSG00000106366 | SERPINE1 | 0.7535342 | 0.7911667 | 3 | 0.7723505 |
| ENSG00000198795 | ZNF521 | 0.7862020 | 0.7397450 | None | 0.7629735 |
| ENSG00000154640 | BTG3 | 0.6921295 | 0.8306872 | None | 0.7614084 |
| ENSG00000138166 | DUSP5 | 0.7541208 | 0.7643020 | None | 0.7592114 |
| ENSG00000072364 | AFF4 | 0.7413482 | 0.7716602 | None | 0.7565042 |
| ENSG00000204569 | PPP1R10 | 0.7470986 | 0.7645752 | None | 0.7558369 |
| ENSG00000135913 | USP37 | 0.6855044 | 0.8254166 | None | 0.7554605 |
| ENSG00000197329 | PELI1 | 0.7612944 | 0.7481606 | None | 0.7547275 |
| ENSG00000130222 | GADD45G | 0.6503873 | 0.8500145 | None | 0.7502009 |
kable(top_genes[[2]], caption=paste0('Top 10 genes for module ', top_modules[2], ' (MTcor = ',
round(moduleTraitCor[paste0('ME',top_modules[2]),1],2),')'))
| ID | external_gene_id | MM | GS | gene.score | importance |
|---|---|---|---|---|---|
| ENSG00000003402 | CFLAR | 0.8349806 | 0.8167723 | None | 0.8258764 |
| ENSG00000138119 | MYOF | 0.8095979 | 0.8334501 | None | 0.8215240 |
| ENSG00000162745 | OLFML2B | 0.7794224 | 0.8526613 | None | 0.8160419 |
| ENSG00000089159 | PXN | 0.7551603 | 0.8687338 | None | 0.8119471 |
| ENSG00000124782 | RREB1 | 0.7602334 | 0.8444107 | None | 0.8023221 |
| ENSG00000198185 | ZNF334 | 0.8308122 | 0.7574061 | None | 0.7941092 |
| ENSG00000168769 | TET2 | 0.7660493 | 0.8170356 | 2 | 0.7915424 |
| ENSG00000120690 | ELF1 | 0.7858080 | 0.7870686 | None | 0.7864383 |
| ENSG00000163629 | PTPN13 | 0.7149171 | 0.8558172 | Neuronal | 0.7853671 |
| ENSG00000183808 | RBM12B | 0.6787244 | 0.8906825 | None | 0.7847035 |
| ENSG00000104870 | FCGRT | 0.6702519 | 0.8567333 | None | 0.7634926 |
| ENSG00000171988 | JMJD1C | 0.7726856 | 0.7508001 | 3 | 0.7617428 |
| ENSG00000117000 | RLF | 0.8221159 | 0.7009312 | None | 0.7615236 |
| ENSG00000173110 | HSPA6 | 0.7142783 | 0.8087644 | None | 0.7615214 |
| ENSG00000180596 | HIST1H2BC | 0.7506019 | 0.7717162 | None | 0.7611591 |
| ENSG00000102531 | FNDC3A | 0.7265045 | 0.7897408 | None | 0.7581227 |
| ENSG00000110719 | TCIRG1 | 0.6831133 | 0.8306871 | None | 0.7569002 |
| ENSG00000130066 | SAT1 | 0.7490607 | 0.7561901 | None | 0.7526254 |
| ENSG00000065978 | YBX1 | 0.6670100 | 0.8339125 | None | 0.7504612 |
| ENSG00000073792 | IGF2BP2 | 0.6578041 | 0.8387673 | None | 0.7482857 |
kable(top_genes[[3]], caption=paste0('Top 10 genes for module ', top_modules[3], ' (MTcor = ',
round(moduleTraitCor[paste0('ME',top_modules[3]),1],2),')'))
| ID | external_gene_id | MM | GS | gene.score | importance |
|---|---|---|---|---|---|
| ENSG00000050748 | MAPK9 | 0.9018732 | -0.9400140 | Neuronal | 0.9209436 |
| ENSG00000108395 | TRIM37 | 0.9037848 | -0.9297931 | None | 0.9167889 |
| ENSG00000138078 | PREPL | 0.9001507 | -0.9038232 | None | 0.9019869 |
| ENSG00000177432 | NAP1L5 | 0.8683153 | -0.9048326 | None | 0.8865739 |
| ENSG00000155097 | ATP6V1C1 | 0.8612597 | -0.8966476 | None | 0.8789537 |
| ENSG00000114573 | ATP6V1A | 0.8322001 | -0.9172599 | None | 0.8747300 |
| ENSG00000163577 | EIF5A2 | 0.8346941 | -0.9108048 | None | 0.8727495 |
| ENSG00000128881 | TTBK2 | 0.8697142 | -0.8751661 | None | 0.8724401 |
| ENSG00000171132 | PRKCE | 0.8338660 | -0.9088992 | None | 0.8713826 |
| ENSG00000111674 | ENO2 | 0.8876355 | -0.8482909 | Neuronal | 0.8679632 |
| ENSG00000176490 | DIRAS1 | 0.8331173 | -0.8967040 | None | 0.8649106 |
| ENSG00000131437 | KIF3A | 0.8801280 | -0.8462276 | Neuronal | 0.8631778 |
| ENSG00000196876 | SCN8A | 0.9339299 | -0.7887782 | 1 | 0.8613540 |
| ENSG00000125814 | NAPB | 0.8865175 | -0.8311321 | None | 0.8588248 |
| ENSG00000172348 | RCAN2 | 0.7982100 | -0.9149243 | None | 0.8565672 |
| ENSG00000184368 | MAP7D2 | 0.8249437 | -0.8801442 | None | 0.8525440 |
| ENSG00000162694 | EXTL2 | 0.8007142 | -0.8982366 | None | 0.8494754 |
| ENSG00000130540 | SULT4A1 | 0.8815880 | -0.8113335 | None | 0.8464608 |
| ENSG00000144285 | SCN1A | 0.8893987 | -0.8000233 | 1 | 0.8447110 |
| ENSG00000132639 | SNAP25 | 0.8756003 | -0.8102863 | 3 | 0.8429433 |
kable(top_genes[[4]], caption=paste0('Top 10 genes for module ', top_modules[4], ' (MTcor = ',
round(moduleTraitCor[paste0('ME',top_modules[4]),1],2),')'))
| ID | external_gene_id | MM | GS | gene.score | importance |
|---|---|---|---|---|---|
| ENSG00000106683 | LIMK1 | 0.8336101 | -0.8623672 | Neuronal | 0.8479886 |
| ENSG00000141576 | RNF157 | 0.8343007 | -0.8485893 | None | 0.8414450 |
| ENSG00000127838 | PNKD | 0.8393553 | -0.8333891 | None | 0.8363722 |
| ENSG00000140854 | KATNB1 | 0.8646680 | -0.7907368 | Neuronal | 0.8277024 |
| ENSG00000148334 | PTGES2 | 0.8037964 | -0.8316634 | None | 0.8177299 |
| ENSG00000006432 | MAP3K9 | 0.7465523 | -0.8872405 | None | 0.8168964 |
| ENSG00000130725 | UBE2M | 0.7814384 | -0.8501315 | Neuronal | 0.8157850 |
| ENSG00000105251 | SHD | 0.7661369 | -0.8630235 | None | 0.8145802 |
| ENSG00000139190 | VAMP1 | 0.8258018 | -0.8015636 | Neuronal | 0.8136827 |
| ENSG00000168993 | CPLX1 | 0.7956338 | -0.8267869 | Neuronal | 0.8112104 |
| ENSG00000183837 | PNMA3 | 0.7999070 | -0.8021580 | None | 0.8010325 |
| ENSG00000180155 | LYNX1 | 0.7934936 | -0.8012133 | None | 0.7973535 |
| ENSG00000124191 | TOX2 | 0.7739545 | -0.8098081 | None | 0.7918813 |
| ENSG00000139637 | C12orf10 | 0.7420227 | -0.8338129 | None | 0.7879178 |
| ENSG00000196972 | LINC00087 | 0.8285660 | -0.7454300 | None | 0.7869980 |
| ENSG00000135631 | RAB11FIP5 | 0.7285209 | -0.8340525 | 3 | 0.7812867 |
| ENSG00000174238 | PITPNA | 0.6804206 | -0.8774836 | None | 0.7789521 |
| ENSG00000106789 | CORO2A | 0.7260306 | -0.8209005 | None | 0.7734656 |
| ENSG00000196177 | ACADSB | 0.7340860 | -0.8051786 | None | 0.7696323 |
| ENSG00000127445 | PIN1 | 0.7966748 | -0.7363993 | None | 0.7665370 |
rm(create_table)
pca = datExpr %>% prcomp
ids = c()
for(tg in top_genes) ids = c(ids, tg$ID)
plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
left_join(dataset, by='ID') %>% dplyr::select(ID, PC1, PC2, Module, gene.score) %>%
mutate(color = ifelse(Module %in% top_modules, as.character(Module), 'gray')) %>%
mutate(alpha = ifelse(color %in% top_modules &
ID %in% ids, 1, 0.1))
plot_data %>% ggplot(aes(PC1, PC2)) + geom_point(alpha=plot_data$alpha, color=plot_data$color) +
theme_minimal() + ggtitle('Important genes identified through WGCNA')
Level of expression by Diagnosis for top genes, ordered by importance (defined above)
create_plot = function(i){
plot_data = datExpr[rownames(datExpr) %in% top_genes[[i]]$ID,] %>% mutate('ID' = rownames(.)) %>%
melt(id.vars='ID') %>% mutate(variable = gsub('X','',variable)) %>%
left_join(top_genes[[i]], by='ID') %>%
left_join(datMeta %>% dplyr::select(Dissected_Sample_ID, Diagnosis),
by = c('variable'='Dissected_Sample_ID')) %>% arrange(desc(importance))
p = ggplotly(plot_data %>% mutate(external_gene_id=factor(external_gene_id,
levels=unique(plot_data$external_gene_id), ordered=T)) %>%
ggplot(aes(external_gene_id, value, fill=Diagnosis)) + geom_boxplot() + theme_minimal() +
xlab(paste0('Top genes for module ', top_modules[i], ' (MTcor = ',
round(genes_info$MTcor[genes_info$Module==top_modules[i]][1],2), ')')) + ylab('Level of Expression') +
theme(axis.text.x = element_text(angle = 90, hjust = 1)))
return(p)
}
create_plot(1)
create_plot(2)
create_plot(3)
create_plot(4)
rm(create_plot)
Using the package anRichment
It was designed by Peter Langfelder explicitly to perform enrichmen analysis on WGCNA’s modules in brain-related experiments (mainly Huntington’s Disease)
It has packages with brain annotations:
BrainDiseaseCollection: A Brain Disease Gene Set Collection for anRichment
MillerAIBSCollection: (included in anRichment) Contains gene sets collected by Jeremy A. Miller at AIBS of various cell type and brain region marker sets, gene sets collected from expression studies of developing brain, as well as a collection of transcription factor (TF) targets from the original ChEA study
The tutorial says it’s an experimental package
It’s not on CRAN nor in Bioconductor
file_name = './../Data/enrichmentAnalysis.RData'
if(file.exists('./../Data/enrichmentAnalysis.RData__________________________________________________________________________________________________________')){
load(file_name)
} else {
# Prepare dataset
# Create dataset with top modules membership and removing the genes without an assigned module
EA_dataset = data.frame('ensembl_gene_id' = genes_info$ID, module = genes_info$Module) %>%
filter(genes_info$Module!='gray')
# Assign Entrez Gene Id to each gene
getinfo = c('ensembl_gene_id','entrezgene')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl', host='feb2014.archive.ensembl.org')
biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=EA_dataset$ensembl_gene_id, mart=mart)
EA_dataset = EA_dataset %>% left_join(biomart_output, by='ensembl_gene_id')
for(tm in top_modules){
cat(paste0('\n',sum(EA_dataset$module==tm & is.na(EA_dataset$entrezgene)), ' genes from top module ',
tm, ' don\'t have an Entrez Gene ID'))
}
rm(getinfo, mart, biomart_output, tm)
# Manual: https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/GeneAnnotation/Tutorials/anRichment-Tutorial1.pdf
collectGarbage()
# EA_dataset = rbind(EA_dataset[EA_dataset$module!='other',], EA_dataset[EA_dataset$module=='other',][sample(sum(EA_dataset$module=='other'), 1000),])
# Prepare datasets
GO_col = buildGOcollection(organism = 'human', verbose = 0)
internal_col = internalCollection(organism = 'human')
MillerAIBS_col = MillerAIBSCollection(organism = 'human')
BrainDisease_col = BrainDiseaseCollection(organism = 'human')
combined_col = mergeCollections(GO_col, internal_col, MillerAIBS_col, BrainDisease_col)
# Print collections used
cat('Using collections: ')
knownGroups(combined_col, sortBy = 'size')
# Perform Enrichment Analysis
enrichment = enrichmentAnalysis(classLabels = EA_dataset$module, identifiers = EA_dataset$entrezgene,
refCollection = combined_col, #useBackground = 'given',
threshold = 1e-4, thresholdType = 'Bonferroni',
getOverlapEntrez = FALSE, getOverlapSymbols = TRUE)
# Save enrichment results
save(enrichment, file=file_name)
}
## Batch submitting query [==>----------------------------] 9% eta: 4s Batch
## submitting query [===>---------------------------] 12% eta: 5s Batch
## submitting query [====>--------------------------] 15% eta: 5s Batch
## submitting query [=====>-------------------------] 18% eta: 5s Batch
## submitting query [======>------------------------] 21% eta: 5s Batch
## submitting query [=======>-----------------------] 24% eta: 4s Batch
## submitting query [=======>-----------------------] 27% eta: 5s Batch
## submitting query [========>----------------------] 30% eta: 4s Batch
## submitting query [=========>---------------------] 33% eta: 4s Batch
## submitting query [==========>--------------------] 36% eta: 4s Batch
## submitting query [===========>-------------------] 39% eta: 4s Batch
## submitting query [============>------------------] 42% eta: 4s Batch
## submitting query [=============>-----------------] 45% eta: 4s Batch
## submitting query [==============>----------------] 48% eta: 3s Batch
## submitting query [===============>---------------] 52% eta: 3s Batch
## submitting query [================>--------------] 55% eta: 3s Batch
## submitting query [=================>-------------] 58% eta: 3s Batch
## submitting query [==================>------------] 61% eta: 3s Batch
## submitting query [===================>-----------] 64% eta: 2s Batch
## submitting query [====================>----------] 67% eta: 2s Batch
## submitting query [=====================>---------] 70% eta: 2s Batch
## submitting query [======================>--------] 73% eta: 2s Batch
## submitting query [======================>--------] 76% eta: 2s Batch
## submitting query [=======================>-------] 79% eta: 1s Batch
## submitting query [========================>------] 82% eta: 1s Batch
## submitting query [=========================>-----] 85% eta: 1s Batch
## submitting query [==========================>----] 88% eta: 1s Batch
## submitting query [===========================>---] 91% eta: 1s Batch
## submitting query [============================>--] 94% eta: 0s Batch submitting
## query [=============================>-] 97% eta: 0s Batch submitting query
## [===============================] 100% eta: 0s
##
## 27 genes from top module #73B000 don't have an Entrez Gene ID
## 29 genes from top module #EE8045 don't have an Entrez Gene ID
## 27 genes from top module #00B9E3 don't have an Entrez Gene ID
## 25 genes from top module #84AD00 don't have an Entrez Gene ID
## Loading required package: org.Hs.eg.db
##
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## Using collections: enrichmentAnalysis: preparing data..
## ..working on label set 1 ..
kable(enrichment$enrichmentTable %>% filter(class==top_modules[1]) %>%
dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
arrange(Bonferroni, desc(enrichmentRatio)),
caption = paste0('Enriched terms for module ', top_modules[1], ' (MTcor = ',
round(genes_info$MTcor[genes_info$Module==top_modules[1]][1],4), ')'))
| dataSetID | shortDataSetName | inGroups | Bonferroni | FDR | enrichmentRatio | effectiveClassSize | effectiveSetSize | nCommonGenes |
|---|---|---|---|---|---|---|---|---|
| JAM:003078 | Temporal Lobe_IN_Cerebral Cortex | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.0678488 | 0.0000609 | 2.581215 | 1423 | 158 | 37 |
| JAMiller.AIBS.000169 | Cerebellum-enriched genes in midfetal human brain | JA Miller at AIBS|Brain|Prenatal brain|Brain region markers | 0.1676492 | 0.0001374 | 2.755622 | 1423 | 124 | 31 |
| GO:0072358 | cardiovascular system development | GO|GO.BP | 0.3613496 | 0.0002744 | 1.688783 | 1423 | 607 | 93 |
| GO:0072359 | circulatory system development | GO|GO.BP | 0.4257766 | 0.0003168 | 1.548309 | 1423 | 897 | 126 |
| GO:0001944 | vasculature development | GO|GO.BP | 0.7108773 | 0.0005035 | 1.674535 | 1423 | 599 | 91 |
| JAM:002886 | Hypothalamus | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 1.0000000 | 0.0019483 | 2.423384 | 1423 | 141 | 31 |
| JAMiller.AIBS.000138 | VZ/SZ/IZ enriched in E14.5 mouse | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Germinal brain | 1.0000000 | 0.0013896 | 1.892415 | 1423 | 332 | 57 |
| GO:0001568 | blood vessel development | GO|GO.BP | 1.0000000 | 0.0013863 | 1.657227 | 1423 | 572 | 86 |
| JAMiller.AIBS.000502 | Genes bound by SUZ12 in mouse MESC from PMID 16625203 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 1.0000000 | 0.0014159 | 1.546771 | 1423 | 791 | 111 |
| JAMiller.AIBS.000208 | RegionalWGCNA midfetal M38 | JA Miller at AIBS|Brain|Prenatal brain|WGCNA | 1.0000000 | 0.0016658 | 1.503067 | 1423 | 902 | 123 |
kable(enrichment$enrichmentTable %>% filter(class==top_modules[2]) %>%
dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
arrange(Bonferroni, enrichmentRatio),
caption = paste0('Enriched terms for module ', top_modules[2], ' (MTcor = ',
round(genes_info$MTcor[genes_info$Module==top_modules[2]][1],4), ')'))
| dataSetID | shortDataSetName | inGroups | Bonferroni | FDR | enrichmentRatio | effectiveClassSize | effectiveSetSize | nCommonGenes |
|---|---|---|---|---|---|---|---|---|
| JAMiller.AIBS.000176 | CortexWGCNA midfetal M6 | JA Miller at AIBS|Brain|Prenatal brain|WGCNA | 0.0013071 | 0.0000016 | 1.928947 | 848 | 863 | 90 |
| GO:0010468 | regulation of gene expression | GO|GO.BP | 0.0318274 | 0.0000311 | 1.345662 | 848 | 3615 | 263 |
| GO:1903506 | regulation of nucleic acid-templated transcription | GO|GO.BP | 0.0330396 | 0.0000320 | 1.407197 | 848 | 2826 | 215 |
| GO:0010556 | regulation of macromolecule biosynthetic process | GO|GO.BP | 0.0358070 | 0.0000345 | 1.369898 | 848 | 3254 | 241 |
| GO:0051252 | regulation of RNA metabolic process | GO|GO.BP | 0.0389042 | 0.0000371 | 1.387082 | 848 | 3027 | 227 |
| GO:2001141 | regulation of RNA biosynthetic process | GO|GO.BP | 0.0429812 | 0.0000406 | 1.402730 | 848 | 2835 | 215 |
| GO:0006355 | regulation of transcription, DNA-templated | GO|GO.BP | 0.0610153 | 0.0000554 | 1.404287 | 848 | 2766 | 210 |
| GO:0019219 | regulation of nucleobase-containing compound metabolic process | GO|GO.BP | 0.0715698 | 0.0000637 | 1.358053 | 848 | 3296 | 242 |
| GO:0097659 | nucleic acid-templated transcription | GO|GO.BP | 0.1089999 | 0.0000931 | 1.377000 | 848 | 2982 | 222 |
| GO:2000112 | regulation of cellular macromolecule biosynthetic process | GO|GO.BP | 0.1185138 | 0.0001002 | 1.363767 | 848 | 3133 | 231 |
kable(enrichment$enrichmentTable %>% filter(class==top_modules[3]) %>%
dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
arrange(Bonferroni, desc(enrichmentRatio)),
caption = paste0('Enriched terms for module ', top_modules[3], ' (MTcor = ',
round(genes_info$MTcor[genes_info$Module==top_modules[3]][1],4), ')'))
| dataSetID | shortDataSetName | inGroups | Bonferroni | FDR | enrichmentRatio | effectiveClassSize | effectiveSetSize | nCommonGenes |
|---|---|---|---|---|---|---|---|---|
| JAMiller.AIBS.000052 | CortexWGCNA 15-21 post-conception weeks C26 | JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA | 0.00e+00 | 0e+00 | 2.708083 | 1695 | 721 | 211 |
| JAMiller.AIBS.000142 | Highest in CP of 13-16 post-conception weeks human | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain | 0.00e+00 | 0e+00 | 2.194883 | 1695 | 1210 | 287 |
| JAMiller.AIBS.000569 | WGCNA humanSpecificOlivedrab2Module frontalCtx FOXP2 | JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA | 0.00e+00 | 0e+00 | 1.524703 | 1695 | 4036 | 665 |
| JAM:002769 | downAD_mitochondrion | JAM|BrainLists|BrainLists.Blalock_AD | 0.00e+00 | 0e+00 | 3.364977 | 1695 | 264 | 96 |
| JAMiller.AIBS.000150 | Highest in CP of E14.5 mouse | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain | 0.00e+00 | 0e+00 | 1.878513 | 1695 | 1266 | 257 |
| JAM:003016 | downAD_synapticTransmission | JAM|BrainLists|BrainLists.Blalock_AD | 0.00e+00 | 0e+00 | 5.152621 | 1695 | 88 | 49 |
| JAMiller.AIBS.000155 | Lowest in VZ of E14.5 mouse | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex | 0.00e+00 | 0e+00 | 1.723944 | 1695 | 1664 | 310 |
| JAMiller.AIBS.000123 | HippocampusWGCNA turquoise DGenriched upAge | JA Miller at AIBS|Brain|Postnatal brain|WGCNA | 0.00e+00 | 0e+00 | 1.921531 | 1695 | 1098 | 228 |
| JAMiller.AIBS.000570 | WGCNA Olivedrab2ModuleGenes with enriched ELAVL2 targets | JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA | 0.00e+00 | 0e+00 | 2.481758 | 1695 | 481 | 129 |
| JAM:003072 | Tail of Caudate Nucleus_IN_Striatum | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 3.506055 | 1695 | 161 | 61 |
| GO:0044456 | synapse part | GO|GO.CC | 0.00e+00 | 0e+00 | 1.967674 | 1695 | 823 | 175 |
| GO:0045202 | synapse | GO|GO.CC | 0.00e+00 | 0e+00 | 1.834783 | 1695 | 1044 | 207 |
| GO:0097060 | synaptic membrane | GO|GO.CC | 0.00e+00 | 0e+00 | 2.418297 | 1695 | 375 | 98 |
| JAMiller.AIBS.000141 | CP enriched in E14.5 mouse | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain | 0.00e+00 | 0e+00 | 2.112789 | 1695 | 565 | 129 |
| GO:0097458 | neuron part | GO|GO.CC | 0.00e+00 | 0e+00 | 1.651046 | 1695 | 1418 | 253 |
| JAM:002764 | downAging_mitochondria_synapse | JAM|BrainLists|BrainLists.Lu_Aging | 0.00e+00 | 0e+00 | 2.349013 | 1695 | 390 | 99 |
| JAM:002751 | Basal Pons | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 3.196728 | 1695 | 165 | 57 |
| JAM:002744 | Autism_differential_expression_across_at_least_one_comparison | JAM|BrainLists|BrainLists.Voineagu | 0.00e+00 | 0e+00 | 1.855589 | 1695 | 763 | 153 |
| JAM:003054 | subiculum_IN_Hippocampal Formation | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 3.065639 | 1695 | 163 | 54 |
| JAMiller.AIBS.000005 | CPi markers at 21 post-conception weeks | JA Miller at AIBS|Brain|Prenatal brain|Cortex|Markers of cortex layers|Postmitotic brain | 1.00e-07 | 0e+00 | 2.395777 | 1695 | 309 | 80 |
| JAM:002805 | Cerebral Cortex | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 2.00e-07 | 0e+00 | 2.988769 | 1695 | 161 | 52 |
| JAM:002739 | arcuate nucleus of medulla_IN_Myelencephalon | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 2.00e-07 | 0e+00 | 2.936799 | 1695 | 167 | 53 |
| JAM:002920 | Lateral Nucleus_IN_Amygdala | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 5.00e-07 | 0e+00 | 2.916314 | 1695 | 165 | 52 |
| GO:0099536 | synaptic signaling | GO|GO.BP | 5.00e-07 | 0e+00 | 1.949884 | 1695 | 560 | 118 |
| GO:0034220 | ion transmembrane transport | GO|GO.BP | 7.00e-07 | 0e+00 | 1.720897 | 1695 | 898 | 167 |
| GO:0098793 | presynapse | GO|GO.CC | 1.00e-06 | 0e+00 | 2.108154 | 1695 | 417 | 95 |
| JAMiller.AIBS.000095 | Cortical PNOC neurons | JA Miller at AIBS|Brain|Postnatal brain|Cell type markers|Cortex | 2.50e-06 | 0e+00 | 1.282364 | 1695 | 3940 | 546 |
| GO:0045211 | postsynaptic membrane | GO|GO.CC | 3.10e-06 | 0e+00 | 2.346005 | 1695 | 284 | 72 |
| GO:0099537 | trans-synaptic signaling | GO|GO.BP | 3.40e-06 | 0e+00 | 1.917431 | 1695 | 555 | 115 |
| JAM:002882 | Hippocampal Formation | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 4.00e-06 | 0e+00 | 2.809155 | 1695 | 168 | 51 |
| GO:0007268 | chemical synaptic transmission | GO|GO.BP | 4.10e-06 | 0e+00 | 1.918037 | 1695 | 550 | 114 |
| GO:0098916 | anterograde trans-synaptic signaling | GO|GO.BP | 4.10e-06 | 0e+00 | 1.918037 | 1695 | 550 | 114 |
| JAMiller.AIBS.000463 | Genes bound by SMAD4 in HUMAN A2780 from PMID 21799915 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 9.70e-06 | 0e+00 | 1.415427 | 1695 | 2079 | 318 |
| GO:0043005 | neuron projection | GO|GO.CC | 1.07e-05 | 0e+00 | 1.625648 | 1695 | 1036 | 182 |
| GO:0030424 | axon | GO|GO.CC | 1.10e-05 | 0e+00 | 1.942358 | 1695 | 505 | 106 |
| GO:0022836 | gated channel activity | GO|GO.MF | 2.52e-05 | 0e+00 | 2.420796 | 1695 | 237 | 62 |
| JAMiller.AIBS.000042 | CortexWGCNA 15-21 post-conception weeks C16 SPenriched | JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA | 2.81e-05 | 0e+00 | 2.570469 | 1695 | 198 | 55 |
| JAM:002824 | Dentate Nucleus_IN_Cerebellar Nucleus | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 5.67e-05 | 1e-07 | 2.725012 | 1695 | 163 | 48 |
| GO:0034702 | ion channel complex | GO|GO.CC | 5.72e-05 | 1e-07 | 2.437355 | 1695 | 224 | 59 |
| GO:0005216 | ion channel activity | GO|GO.MF | 6.05e-05 | 1e-07 | 2.256199 | 1695 | 283 | 69 |
| GO:0022839 | ion gated channel activity | GO|GO.MF | 6.28e-05 | 1e-07 | 2.391843 | 1695 | 236 | 61 |
| GO:1902495 | transmembrane transporter complex | GO|GO.CC | 7.84e-05 | 1e-07 | 2.361023 | 1695 | 243 | 62 |
| GO:0099240 | intrinsic component of synaptic membrane | GO|GO.CC | 8.40e-05 | 1e-07 | 2.764088 | 1695 | 154 | 46 |
kable(enrichment$enrichmentTable %>% filter(class==top_modules[4]) %>%
dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
arrange(Bonferroni, desc(enrichmentRatio)),
caption = paste0('Enriched terms for module ', top_modules[4], ' (MTcor = ',
round(genes_info$MTcor[genes_info$Module==top_modules[4]][1],4), ')'))
| dataSetID | shortDataSetName | inGroups | Bonferroni | FDR | enrichmentRatio | effectiveClassSize | effectiveSetSize | nCommonGenes |
|---|---|---|---|---|---|---|---|---|
| JAMiller.AIBS.000124 | HippocampusWGCNA yellow | JA Miller at AIBS|Brain|Postnatal brain|WGCNA | 0.0000395 | 0.0000001 | 2.175436 | 852 | 677 | 80 |
| GO:0005759 | mitochondrial matrix | GO|GO.CC | 0.7763462 | 0.0005425 | 2.107644 | 852 | 428 | 49 |
| JAM:002985 | Parietal Lobe_IN_Cerebral Cortex | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 1.0000000 | 0.0130730 | 3.012484 | 852 | 110 | 18 |
| GO:0098798 | mitochondrial protein complex | GO|GO.CC | 1.0000000 | 0.0016657 | 2.503063 | 852 | 228 | 31 |
| JAMiller.AIBS.000206 | RegionalWGCNA midfetal M36 | JA Miller at AIBS|Brain|Prenatal brain|WGCNA | 1.0000000 | 0.0018009 | 2.492133 | 852 | 229 | 31 |
| GO:0005743 | mitochondrial inner membrane | GO|GO.CC | 1.0000000 | 0.0139251 | 1.914949 | 852 | 423 | 44 |
| JAMiller.AIBS.000048 | CortexWGCNA 15-21 post-conception weeks C22 CPenriched enrichedForAutismGenes | JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA | 1.0000000 | 0.0009229 | 1.880390 | 852 | 607 | 62 |
| GO:0044429 | mitochondrial part | GO|GO.CC | 1.0000000 | 0.0016518 | 1.668186 | 852 | 927 | 84 |
| JAMiller.AIBS.000044 | CortexWGCNA 15-21 post-conception weeks C18 | JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA | 1.0000000 | 0.0167403 | 1.666610 | 852 | 718 | 65 |
| GO:0005739 | mitochondrion | GO|GO.CC | 1.0000000 | 0.0089002 | 1.473823 | 852 | 1399 | 112 |
Option 1: Select the modules with the highest percentage of SFARI Genes
This is an easy approach but it doesn’t take into account the size of the modules (the bigger the module, the more robust the result), and since our top modules are quite small (40 and 174) this could be a problem
Perhaps an approach that incorporates the cluster size into the selection criteria would be better
# Prepare dataset
perc_SFARI_genes = dataset %>% mutate('hasSFARIscore' = !gene.score %in% c('None', 'Neuronal')) %>%
group_by(Module, MTcor, hasSFARIscore) %>% summarise(s=n()) %>%
left_join(dataset %>% group_by(Module) %>% summarise(n=n()), by='Module') %>%
mutate(p=round(s/n*100,2)) %>% filter(hasSFARIscore & Module != 'gray') %>% arrange(desc(p))
perc_SFARI_genes %>% ggplot(aes(reorder(Module, -p), p)) + geom_bar(stat='identity', fill = perc_SFARI_genes$Module) +
xlab('Module') + ylab('% of SFARI Genes') + theme_minimal() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
keep_modules = perc_SFARI_genes$Module[1:2]
cat(paste0('Keeping top 2 modules: ', paste(keep_modules, collapse = ', ')))
## Keeping top 2 modules: #8195FF, #B79F00
Option 2: Assume independence between SFARI Genes and modules and calculate the probability of obtaining a proportion at least as big as the one found in each cluster given its size
Notation:
N = Number of genes (16147)
S = Number of SFARI genes (789)
For each cluster C:
n = Number of genes in cluster
s = Number of SFARI Genes in cluster
If we interpret the number of genes (\(n\)) in a cluster as \(n\) random draws without replacement from a finite population of size \(N\), and the number of SFARI genes in the cluster (\(k\)) as \(k\) successes in those \(n\) draws, where we know that \(N\) contains exactly \(K\) successes, then we can use the hypergeometric distribution to calculate the statistical significance of having drawn \(k\) successes out of \(n\) draws, and use this value to select the clusters with the highest statistical significance
We want to select the modules with the smallest probability, since this means that the probability of obtaining at least this number of SFARI genes by chance is small (the )
Note: Instead of calculating the probability we are using the log probability to differentiate better between the smallest probabilities
N = sum(perc_SFARI_genes$n)
S = sum(perc_SFARI_genes$s)
calc_prob = function(row){
s = row[4] %>% as.numeric
n = row[5] %>% as.numeric
prob = phyper(s,S,N-S,n, log.p = TRUE, lower.tail=FALSE)
return(prob)
}
perc_SFARI_genes$prob = apply(perc_SFARI_genes, 1, function(x) calc_prob(x))
perc_SFARI_genes = perc_SFARI_genes %>% arrange(prob)
perc_SFARI_genes %>% ggplot(aes(MTcor, prob, size=n)) + geom_point(color=perc_SFARI_genes$Module, alpha=0.5) + geom_smooth(color='#cccccc', se=FALSE) +
xlab('Module-Trait Correlation') + ylab('Hypergeometric (log)Probability') + theme_minimal() + theme(legend.position = 'none')
We are no longer selecting the modules with the highest percentage of SFARI genes, but instead we are pondering this by the size of the module
In the end we are keeping cluster #B79F00 as a top cluster but we are replacing #8195FF for #FE61CF, which has a lower percentage of SFARI genes but is a bigger cluster
perc_SFARI_genes %>% ggplot(aes(p, prob, size=n)) + geom_point(color=perc_SFARI_genes$Module, alpha=0.5) + geom_smooth(color='#cccccc', se=FALSE) +
xlab('% of SFARI Genes') + ylab('Hypergeometric (log)Probability') + theme_minimal() + theme(legend.position = 'none')
perc_SFARI_genes %>% ggplot(aes(reorder(Module, prob), prob)) + geom_bar(stat='identity', fill = perc_SFARI_genes$Module) +
xlab('Module') + ylab('(Hypergeometric (log)Probability') + theme_minimal() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
keep_modules = perc_SFARI_genes$Module[1:2]
cat(paste0('Keeping top 2 modules: ', paste(keep_modules, collapse = ', ')))
## Keeping top 2 modules: #B79F00, #FE61CF
rm(N,S,calc_prob)
kable(enrichment$enrichmentTable %>% filter(class==keep_modules[1]) %>%
dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
arrange(Bonferroni, desc(enrichmentRatio)),
caption = paste0('Enriched terms for module ', keep_modules[1], ' (SFARI Genes = ',
round(perc_SFARI_genes$p[perc_SFARI_genes$Module==keep_modules[1]][1],4), '%)'))
| dataSetID | shortDataSetName | inGroups | Bonferroni | FDR | enrichmentRatio | effectiveClassSize | effectiveSetSize | nCommonGenes |
|---|---|---|---|---|---|---|---|---|
| JAMiller.AIBS.000569 | WGCNA humanSpecificOlivedrab2Module frontalCtx FOXP2 | JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA | 0.0008823 | 0.0000011 | 1.850606 | 168 | 4036 | 80 |
| GO:0051252 | regulation of RNA metabolic process | GO|GO.BP | 0.2041583 | 0.0001640 | 1.881450 | 168 | 3027 | 61 |
| GO:0090304 | nucleic acid metabolic process | GO|GO.BP | 0.2908464 | 0.0002255 | 1.670297 | 168 | 4304 | 77 |
| GO:2000112 | regulation of cellular macromolecule biosynthetic process | GO|GO.BP | 0.2970930 | 0.0002299 | 1.847594 | 168 | 3133 | 62 |
| GO:0019219 | regulation of nucleobase-containing compound metabolic process | GO|GO.BP | 0.3413090 | 0.0002605 | 1.812876 | 168 | 3296 | 64 |
| GO:0010556 | regulation of macromolecule biosynthetic process | GO|GO.BP | 1.0000000 | 0.0007780 | 1.778891 | 168 | 3254 | 62 |
| GO:0031326 | regulation of cellular biosynthetic process | GO|GO.BP | 1.0000000 | 0.0007603 | 1.754842 | 168 | 3405 | 64 |
| GO:0009889 | regulation of biosynthetic process | GO|GO.BP | 1.0000000 | 0.0006901 | 1.748373 | 168 | 3471 | 65 |
| GO:0016070 | RNA metabolic process | GO|GO.BP | 1.0000000 | 0.0010016 | 1.683317 | 168 | 3827 | 69 |
| GO:0005634 | nucleus | GO|GO.CC | 1.0000000 | 0.0013611 | 1.475657 | 168 | 5884 | 93 |
kable(enrichment$enrichmentTable %>% filter(class==keep_modules[2]) %>%
dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
arrange(Bonferroni, enrichmentRatio),
caption = paste0('Enriched terms for module ', keep_modules[2], ' (SFARI Genes = ',
round(perc_SFARI_genes$p[perc_SFARI_genes$Module==keep_modules[2]][1],4), '%)'))
| dataSetID | shortDataSetName | inGroups | Bonferroni | FDR | enrichmentRatio | effectiveClassSize | effectiveSetSize | nCommonGenes |
|---|---|---|---|---|---|---|---|---|
| JAMiller.AIBS.000188 | CortexWGCNA midfetal M18 | JA Miller at AIBS|Brain|Prenatal brain|WGCNA | 0.0000000 | 0.0000000 | 7.058003 | 313 | 426 | 60 |
| JAMiller.AIBS.000195 | RegionalWGCNA midfetal M25 | JA Miller at AIBS|Brain|Prenatal brain|WGCNA | 0.0000000 | 0.0000000 | 8.176139 | 313 | 190 | 31 |
| JAMiller.AIBS.000048 | CortexWGCNA 15-21 post-conception weeks C22 CPenriched enrichedForAutismGenes | JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA | 0.0000000 | 0.0000000 | 3.962714 | 313 | 607 | 48 |
| JAMiller.AIBS.000044 | CortexWGCNA 15-21 post-conception weeks C18 | JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA | 0.0000097 | 0.0000000 | 3.140713 | 313 | 718 | 45 |
| JAMiller.AIBS.000268 | Genes bound by EGR1 in HUMAN ERYTHROLEUKEMIA from PMID 20690147 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 0.0001385 | 0.0000002 | 1.556912 | 313 | 4828 | 150 |
| JAMiller.AIBS.000155 | Lowest in VZ of E14.5 mouse | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex | 0.0004462 | 0.0000006 | 2.138185 | 313 | 1664 | 71 |
| JAMiller.AIBS.000569 | WGCNA humanSpecificOlivedrab2Module frontalCtx FOXP2 | JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA | 0.0006891 | 0.0000009 | 1.614107 | 313 | 4036 | 130 |
| JAMiller.AIBS.000257 | Genes bound by DMRT1 in MOUSE TESTES from PMID 23473982 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 0.0747682 | 0.0000662 | 1.969328 | 313 | 1654 | 65 |
| JAMiller.AIBS.000245 | Genes bound by CREB1 in RAT HIPPOCAMPUS from PMID 23762244 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 0.1714689 | 0.0001401 | 1.823144 | 313 | 2034 | 74 |
| JAMiller.AIBS.000124 | HippocampusWGCNA yellow | JA Miller at AIBS|Brain|Postnatal brain|WGCNA | 0.3129563 | 0.0002413 | 2.590714 | 313 | 677 | 35 |
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] org.Hs.eg.db_3.8.2
## [2] BrainDiseaseCollection_1.00
## [3] anRichment_1.01-2
## [4] TxDb.Mmusculus.UCSC.mm10.knownGene_3.4.7
## [5] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [6] GenomicFeatures_1.36.4
## [7] GenomicRanges_1.36.1
## [8] GenomeInfoDb_1.20.0
## [9] anRichmentMethods_0.90-1
## [10] WGCNA_1.69
## [11] fastcluster_1.1.25
## [12] dynamicTreeCut_1.63-1
## [13] GO.db_3.8.2
## [14] AnnotationDbi_1.46.1
## [15] IRanges_2.18.3
## [16] S4Vectors_0.22.1
## [17] Biobase_2.44.0
## [18] BiocGenerics_0.30.0
## [19] biomaRt_2.40.5
## [20] knitr_1.28
## [21] doParallel_1.0.15
## [22] iterators_1.0.12
## [23] foreach_1.5.0
## [24] polycor_0.7-10
## [25] expss_0.10.2
## [26] GGally_1.5.0
## [27] gridExtra_2.3
## [28] viridis_0.5.1
## [29] viridisLite_0.3.0
## [30] RColorBrewer_1.1-2
## [31] dendextend_1.13.4
## [32] plotly_4.9.2
## [33] glue_1.3.2
## [34] reshape2_1.4.3
## [35] forcats_0.5.0
## [36] stringr_1.4.0
## [37] dplyr_0.8.5
## [38] purrr_0.3.3
## [39] readr_1.3.1
## [40] tidyr_1.0.2
## [41] tibble_3.0.0
## [42] ggplot2_3.3.0
## [43] tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.1.5
## [3] Hmisc_4.4-0 plyr_1.8.6
## [5] lazyeval_0.2.2 splines_3.6.3
## [7] crosstalk_1.1.0.1 BiocParallel_1.18.1
## [9] digest_0.6.25 htmltools_0.4.0
## [11] fansi_0.4.1 magrittr_1.5
## [13] checkmate_2.0.0 memoise_1.1.0
## [15] cluster_2.1.0 annotate_1.62.0
## [17] Biostrings_2.52.0 modelr_0.1.6
## [19] matrixStats_0.56.0 prettyunits_1.1.1
## [21] jpeg_0.1-8.1 colorspace_1.4-1
## [23] blob_1.2.1 rvest_0.3.5
## [25] haven_2.2.0 xfun_0.12
## [27] crayon_1.3.4 RCurl_1.98-1.1
## [29] jsonlite_1.6.1 genefilter_1.66.0
## [31] impute_1.58.0 survival_3.1-12
## [33] gtable_0.3.0 zlibbioc_1.30.0
## [35] XVector_0.24.0 DelayedArray_0.10.0
## [37] scales_1.1.0 mvtnorm_1.1-0
## [39] DBI_1.1.0 Rcpp_1.0.4
## [41] xtable_1.8-4 progress_1.2.2
## [43] htmlTable_1.13.3 foreign_0.8-76
## [45] bit_1.1-15.2 preprocessCore_1.46.0
## [47] Formula_1.2-3 htmlwidgets_1.5.1
## [49] httr_1.4.1 acepack_1.4.1
## [51] ellipsis_0.3.0 farver_2.0.3
## [53] pkgconfig_2.0.3 reshape_0.8.8
## [55] XML_3.99-0.3 nnet_7.3-14
## [57] dbplyr_1.4.2 locfit_1.5-9.4
## [59] labeling_0.3 tidyselect_1.0.0
## [61] rlang_0.4.5 munsell_0.5.0
## [63] cellranger_1.1.0 tools_3.6.3
## [65] cli_2.0.2 generics_0.0.2
## [67] RSQLite_2.2.0 broom_0.5.5
## [69] evaluate_0.14 yaml_2.2.1
## [71] bit64_0.9-7 fs_1.4.0
## [73] nlme_3.1-147 xml2_1.2.5
## [75] compiler_3.6.3 rstudioapi_0.11
## [77] curl_4.3 png_0.1-7
## [79] reprex_0.3.0 geneplotter_1.62.0
## [81] stringi_1.4.6 highr_0.8
## [83] lattice_0.20-41 Matrix_1.2-18
## [85] vctrs_0.2.4 pillar_1.4.3
## [87] lifecycle_0.2.0 data.table_1.12.8
## [89] bitops_1.0-6 rtracklayer_1.44.4
## [91] R6_2.4.1 latticeExtra_0.6-29
## [93] codetools_0.2-16 assertthat_0.2.1
## [95] SummarizedExperiment_1.14.1 DESeq2_1.24.0
## [97] withr_2.1.2 GenomicAlignments_1.20.1
## [99] Rsamtools_2.0.3 GenomeInfoDbData_1.2.1
## [101] mgcv_1.8-31 hms_0.5.3
## [103] grid_3.6.3 rpart_4.1-15
## [105] rmarkdown_2.1 lubridate_1.7.4
## [107] base64enc_0.1-3